library(tidyverse)
library(broom)
library(parsnip)
library(tidymodels)
library(ggplot2)
library(ggiraph)
library(plotly)
library(gganimate)
library(gifski)
Main research question: What are the driving factors for renewable energy production for different countries, and what do these trends look like over time in comparison to fossil fuel usage and emissions?
Sub questions: 1. What countries are producing the most renewable energy? And, how does this compare to the capacity of each country to do so?
What are the main sources of renewable energy for each country?
How does each country’s renewable energy output compare with their greenhouse gas emissions?
Does renewable energy prominence correlate in any way with a country’s GDP?
Is the rate at which we are producing renewable energy increasing? How does the trend relate to our rate of producing emissions?
What countries are in the most need of renewable energy based on population and consumption?
Which countries are leaders in solar, wind, and nuclear power?
What do future trends look like for renewable energy?
How do renewable energy trends vary by continent?
The dataset we have chosen is World Energy Consumption, a dataset consisting of key metrics of energy usage (primary energy, per capita, growth rates, energy mix, electricy mix, and other metrics) from Kaggle. This data set is part of Our World in Data, which seeks to collect data and research the worlds largest problems. Currently this data set contains 122 columns of variables, and over 17,000 observations for each country from 1900 to the present. There are many observations which have “NA” as their entry, so it is likely we will need to clean the data quite a bit, or focused on some specific countries of interest with complete data.
alldata <- read_csv("/cloud/project/data/WorldEnergyConsumption.csv")
glimpse(alldata)
## Rows: 17,432
## Columns: 122
## $ iso_code <chr> "AFG", "AFG", "AFG", "AFG", "A…
## $ country <chr> "Afghanistan", "Afghanistan", …
## $ year <dbl> 1900, 1901, 1902, 1903, 1904, …
## $ coal_prod_change_pct <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ coal_prod_change_twh <dbl> NA, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ gas_prod_change_pct <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ gas_prod_change_twh <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ oil_prod_change_pct <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ oil_prod_change_twh <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ energy_cons_change_pct <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ energy_cons_change_twh <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ biofuel_share_elec <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ biofuel_elec_per_capita <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ biofuel_cons_change_pct <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ biofuel_share_energy <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ biofuel_cons_change_twh <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ biofuel_consumption <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ biofuel_cons_per_capita <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ carbon_intensity_elec <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ coal_share_elec <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ coal_cons_change_pct <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ coal_share_energy <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ coal_cons_change_twh <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ coal_consumption <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ coal_elec_per_capita <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ coal_cons_per_capita <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ coal_production <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coal_prod_per_capita <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ electricity_generation <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ biofuel_electricity <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ coal_electricity <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ fossil_electricity <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ gas_electricity <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ hydro_electricity <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ nuclear_electricity <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ oil_electricity <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ other_renewable_electricity <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ other_renewable_exc_biofuel_electricity <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ renewables_electricity <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ solar_electricity <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ wind_electricity <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ energy_per_gdp <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ energy_per_capita <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ fossil_cons_change_pct <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ fossil_share_energy <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ fossil_cons_change_twh <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ fossil_fuel_consumption <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ fossil_energy_per_capita <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ fossil_cons_per_capita <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ fossil_share_elec <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ gas_share_elec <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ gas_cons_change_pct <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ gas_share_energy <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ gas_cons_change_twh <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ gas_consumption <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ gas_elec_per_capita <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ gas_energy_per_capita <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ gas_production <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ gas_prod_per_capita <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ hydro_share_elec <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ hydro_cons_change_pct <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ hydro_share_energy <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ hydro_cons_change_twh <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ hydro_consumption <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ hydro_elec_per_capita <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ hydro_energy_per_capita <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ low_carbon_share_elec <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ low_carbon_electricity <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ low_carbon_elec_per_capita <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ low_carbon_cons_change_pct <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ low_carbon_share_energy <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ low_carbon_cons_change_twh <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ low_carbon_consumption <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ low_carbon_energy_per_capita <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ nuclear_share_elec <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ nuclear_cons_change_pct <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ nuclear_share_energy <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ nuclear_cons_change_twh <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ nuclear_consumption <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ nuclear_elec_per_capita <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ nuclear_energy_per_capita <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ oil_share_elec <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ oil_cons_change_pct <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ oil_share_energy <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ oil_cons_change_twh <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ oil_consumption <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ oil_elec_per_capita <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ oil_energy_per_capita <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ oil_production <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ oil_prod_per_capita <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ other_renewables_elec_per_capita <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ other_renewables_share_elec <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ other_renewables_cons_change_pct <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ other_renewables_share_energy <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ other_renewables_cons_change_twh <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ other_renewable_consumption <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ other_renewables_energy_per_capita <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ per_capita_electricity <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ population <dbl> 5021241, 5053439, 5085403, 511…
## $ primary_energy_consumption <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ renewables_elec_per_capita <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ renewables_share_elec <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ renewables_cons_change_pct <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ renewables_share_energy <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ renewables_cons_change_twh <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ renewables_consumption <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ renewables_energy_per_capita <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ solar_share_elec <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ solar_cons_change_pct <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ solar_share_energy <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ solar_cons_change_twh <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ solar_consumption <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ solar_elec_per_capita <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ solar_energy_per_capita <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ gdp <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ wind_share_elec <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ wind_cons_change_pct <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ wind_share_energy <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ wind_cons_change_twh <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ wind_consumption <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ wind_elec_per_capita <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
## $ wind_energy_per_capita <dbl> NA, NA, NA, NA, NA, NA, NA, NA…
cleandata <- alldata %>%
filter(country %in% c("United States", "Canada", "United Kingdom", "Spain", "France", "Germany", "Switzerland", "Italy", "Norway", "Sweden", "Finland", "China", "Brazil", "India", "Russia", "Japan", "New Zealand", "Australia", "Africa", "South Africa"), year >= 1990) %>%
select(country,
year,
wind_energy_per_capita,
wind_consumption,
wind_share_energy,
wind_cons_change_twh,
gdp, solar_energy_per_capita,
solar_consumption,
solar_share_energy,
solar_cons_change_twh,
renewables_energy_per_capita,
renewables_consumption,
renewables_cons_change_twh,
renewables_share_energy, population,
nuclear_consumption,
nuclear_share_energy,
nuclear_cons_change_twh,
nuclear_energy_per_capita,
hydro_share_energy,
hydro_cons_change_twh,
hydro_energy_per_capita,
hydro_consumption,
fossil_cons_change_twh,
fossil_fuel_consumption,
fossil_energy_per_capita,
fossil_share_energy,
energy_cons_change_twh,
energy_per_gdp,
energy_per_capita,
biofuel_cons_change_twh,
biofuel_share_energy,
biofuel_cons_per_capita,
biofuel_consumption)
cleandata
## # A tibble: 619 × 35
## country year wind_energy_per_capita wind_consumption wind_share_energy
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Africa 1990 0 0 0
## 2 Africa 1991 0 0 0
## 3 Africa 1992 0 0 0
## 4 Africa 1993 0.008 0.006 0
## 5 Africa 1994 0.021 0.015 0.001
## 6 Africa 1995 0.024 0.018 0.001
## 7 Africa 1996 0.027 0.02 0.001
## 8 Africa 1997 0.027 0.02 0.001
## 9 Africa 1998 0.027 0.021 0.001
## 10 Africa 1999 0.069 0.055 0.002
## # … with 609 more rows, and 30 more variables: wind_cons_change_twh <dbl>,
## # gdp <dbl>, solar_energy_per_capita <dbl>, solar_consumption <dbl>,
## # solar_share_energy <dbl>, solar_cons_change_twh <dbl>,
## # renewables_energy_per_capita <dbl>, renewables_consumption <dbl>,
## # renewables_cons_change_twh <dbl>, renewables_share_energy <dbl>,
## # population <dbl>, nuclear_consumption <dbl>, nuclear_share_energy <dbl>,
## # nuclear_cons_change_twh <dbl>, nuclear_energy_per_capita <dbl>, …
Something that interests our group is the growth rate of of renewable energy sources over time. We could, for example, use time as the predictor variable, and shares of solar energy per capita as the outcome variable, and attempt to predict the future trend of solar energy growth. We can also do this for wind and nuclear energy sources, and compare their growth and projected growth.
alldata %>%
filter(country == "United States",
year >= 1965) %>%
arrange(year) %>%
select(year, fossil_share_energy, renewables_share_energy, nuclear_share_energy)
## # A tibble: 56 × 4
## year fossil_share_energy renewables_share_energy nuclear_share_energy
## <dbl> <dbl> <dbl> <dbl>
## 1 1965 95.9 4.05 0.073
## 2 1966 96.0 3.86 0.105
## 3 1967 95.6 4.21 0.141
## 4 1968 95.8 4.01 0.217
## 5 1969 95.5 4.25 0.229
## 6 1970 95.6 4.07 0.347
## 7 1971 95.1 4.27 0.593
## 8 1972 95.0 4.18 0.801
## 9 1973 94.8 4.03 1.19
## 10 1974 93.8 4.54 1.66
## # … with 46 more rows
This table shows us the trajectory of energy shares in the United States from 1965-present (the data begins for these categories in 1965). The trends shows the fossil fuel shares have gone down over time, as renewables and nuclear have risen. This is a good starting off point which could be cool to visualize, especially with other countries.
alldata %>%
filter(country == "United States" |
country == "United Kingdom" |
country == "France" |
country == "Germany" |
country == "China" |
country == "Russia") %>%
ggplot(mapping = aes(x = year,
y= solar_energy_per_capita,
color = country,
fill = country)) +
geom_line() +
xlim(2000, 2020)+
labs(title = "The Development of Solar Energy in the Past 20 Years",
x = "Year ",
y = "Solar Energy Per Capita (kWh)",
subtitle = "Per capita, selected countries of interest"
) +
theme_minimal()
## Warning: Removed 606 row(s) containing missing values (geom_path).
This plot shows the us per capita solar energy generated by China, France, Germany, Russia, the UK, and the US. The data is shown per capita to normalize the different sizes of the countries. Per capita, Germany leads the pack in solar energy production. This is also a good starting off point, and it would be interesting to see how these and other countries perform in other areas of renewables.
In the beginning, we will likely be looking at trends over time and comparing to different variables. Metrics like population, GDP, and per capita consumption will be interesting to look at for different countries over different timescales, especially for different subsets of energy types. We will be able to predict trends within different confidence intervals by inserting lines of best fit. As we learn more skills in R, we will be able to expand our data analysis and visualizations. One of the visualizations our group is eager to learn is leaflets, so we can create interactive maps, colored global or continental maps.
When visualizing renewable energy growth and projected growth, correlation coefficients and lines of best fit will be necessary. Based on the preliminary visualization, it looks like solar energy will continue to grow noticably in each country except for Russia. A line of best fit will help quantify exactly how much projected growth there is for each country.
Poudel, P, World Energy Consumption: Consumption of energy by different countries, electronic dataset, viewed 28 February 2022, https://www.kaggle.com/pralabhpoudel/world-energy-consumption
fossildata <- cleandata %>%
select(fossil_fuel_consumption, year, country) %>%
filter(country == c("United States", "China"))
## Warning in country == c("United States", "China"): longer object length is not a
## multiple of shorter object length
ggplot(data = fossildata, mapping = aes(x = year,
y = fossil_fuel_consumption,
color = country)) +
geom_point() + geom_smooth(se = FALSE) +
theme_minimal() +
labs(title = "Fossil Fuel Consumption for China and the United States",
subtitle = "By year",
x = "Year",
y = "Fossil Fuel Consumption (TWh)",
color = "Country")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).
year_1990_fossil_fuel_consump <- fossildata %>%
mutate(year = year - 1990)
m1 <- linear_reg() %>%
set_engine("lm") %>%
fit(fossil_fuel_consumption ~ year*factor(country), data = year_1990_fossil_fuel_consump)
new_data <- data.frame(year = seq(0, 33), country = rep("China", 34))
#new_data <- new_data %>%
#mutate(predicted = predict(m1, new_data), actual = year_1990_fossil_fuel_consump$fossil_fuel_consumption)
wind_consumption_data <- cleandata %>%
filter(year >= 2000) %>%
select(wind_consumption, year, country)
wind_consumption_graph <- ggplot(data = wind_consumption_data, mapping = aes(x = year,
y = wind_consumption,
color = country))+
stat_smooth(geom='line', alpha=0.5, se=FALSE) +
theme_minimal() +
labs(title = "Wind Consumption in the Past 20 Years",
subtitle = "For select countries of interest",
x = "Year",
y = "Primary Energy from Wind (TWh)")
ggplotly(wind_consumption_graph)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 19 rows containing non-finite values (stat_smooth).
wind_energy_per_capita_data <- cleandata %>%
filter(year >= 2000) %>%
select(wind_energy_per_capita, year, country)
wind_energy_per_capita_graph <- ggplot(data = wind_energy_per_capita_data, mapping = aes(x = year,
y = wind_energy_per_capita,
color = country))+
stat_smooth(geom='line', alpha=0.5, se=FALSE) +
theme_minimal() +
labs(title = "Wind Energy per Capita in the Past 20 Years",
subtitle = "For select countries of interest",
x = "Year",
y = "Primary Energy from Wind per Capita (TWh)")
ggplotly(wind_energy_per_capita_graph)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 19 rows containing non-finite values (stat_smooth).
solar_consumption_data <- cleandata %>%
filter(year >= 2000) %>%
select(solar_consumption, year, country)
solar_consumption_graph <- ggplot(data = solar_consumption_data, mapping = aes(x = year,
y = solar_consumption,
color = country)) +
stat_smooth(geom='line', alpha=0.5, se=FALSE) +
theme_minimal() +
labs(title = "Solar Consumption in the Past 20 Years",
subtitle = "For select countries of interest",
x = "Year",
y = "Primary Energy from Solar (TWh)")
ggplotly(solar_consumption_graph)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 19 rows containing non-finite values (stat_smooth).
solar_energy_per_capita_data <- cleandata %>%
filter(year >= 2000) %>%
select(solar_energy_per_capita, year, country)
solar_energy_per_capita_graph <- ggplot(data = solar_energy_per_capita_data, mapping = aes(x = year, y = solar_energy_per_capita, color = country)) +
stat_smooth(geom='line', alpha=0.5, se=FALSE) +
theme_minimal() +
labs(title = "Solar Energy per Capita in the Past 20 Years",
subtitle = "For select countries of interest",
x = "Year",
y = "Primary Energy from Solar per Capita (TWh)")
ggplotly(solar_energy_per_capita_graph)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 19 rows containing non-finite values (stat_smooth).